{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import xgboost as xgb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "rng = np.random.RandomState(1991)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 借助sklearn中cv保存各个模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.datasets import load_digits"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.cross_validation import KFold"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "digits = load_digits(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X = digits['data']\n",
    "y = digits['target']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "xgb_models = []\n",
    "for train_index, test_index in kf:\n",
    "    xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index]) #二分类\n",
    "    xgb_models.append(xgb_model)\n",
    "    preds = xgb_model.predict(X[test_index])\n",
    "    labels = y[test_index]\n",
    "    err = sum(1 for i in range(len(preds))\n",
    "              if int(preds[i]>0.5) != labels[i]) / float(len(preds))\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(xgb_models)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 多分类"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.datasets import load_iris\n",
    "from sklearn.cross_validation import KFold"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def check_pred(preds, labels):\n",
    "    err = sum(1 for i in range(len(preds))\n",
    "              if int(preds[i] > 0.5) != labels[i]) / float(len(preds))\n",
    "#     assert err < 0.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "iris = load_iris()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X = iris['data']\n",
    "y = iris['target']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(150,)"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "for train_index, test_index in kf:\n",
    "    xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])\n",
    "    preds = xgb_model.predict(X[test_index])\n",
    "    # 测试几个predict()中的参数\n",
    "    preds2 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=3)\n",
    "    preds3 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=0)\n",
    "    preds4 = xgb_model.predict(X[test_index], output_margin=False, ntree_limit=3)\n",
    "    labels = y[test_index]\n",
    "    \n",
    "    check_pred(preds, labels)\n",
    "    check_pred(preds2, labels)\n",
    "    check_pred(preds3, labels)\n",
    "    check_pred(preds4, labels)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 回归问题"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.metrics import mean_squared_error\n",
    "from sklearn.datasets import load_boston\n",
    "from sklearn.cross_validation import KFold"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "boston = load_boston()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X = boston['data']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "y = boston['target']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "for train_index, test_index in kf:\n",
    "    xgb_model = xgb.XGBRegressor().fit(X[train_index], y[train_index])\n",
    "    preds = xgb_model.predict(X[test_index])\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 使用 sklearn中网格搜索调参"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.grid_search import GridSearchCV"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "xgb_model = xgb.XGBRegressor()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],\n",
    "                               'n_estimators': [50, 100, 200]}, verbose=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 3 folds for each of 9 candidates, totalling 27 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:    3.3s finished\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=None, error_score='raise',\n",
       "       estimator=XGBRegressor(base_score=0.5, colsample_bylevel=1, colsample_bytree=1, gamma=0,\n",
       "       learning_rate=0.1, max_delta_step=0, max_depth=3,\n",
       "       min_child_weight=1, missing=None, n_estimators=100, nthread=-1,\n",
       "       objective='reg:linear', reg_alpha=0, reg_lambda=1,\n",
       "       scale_pos_weight=1, seed=0, silent=True, subsample=1),\n",
       "       fit_params={}, iid=True, n_jobs=1,\n",
       "       param_grid={'n_estimators': [50, 100, 200], 'max_depth': [2, 4, 6]},\n",
       "       pre_dispatch='2*n_jobs', refit=True, scoring=None, verbose=1)"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf.fit(X, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.59848792071667345"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf.best_score_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'max_depth': 4, 'n_estimators': 100}"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf.best_params_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 自定义损失函数, XGBClassifier和XGBRegressor都支持"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def objective_ls(y_true, y_pred):\n",
    "    grad = y_pred - y_true\n",
    "    hess = np.ones(len(y_true))\n",
    "    return grad, hess"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "for train_index, test_index in kf:\n",
    "    xgb_model = xgb.XGBRegressor(objective=objective_ls).fit(X[train_index], y[train_index])\n",
    "    preds = xgb_model.predict(X[test_index])\n",
    "    labels = y[test_index]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 可视化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "iris = load_iris()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "classifier = xgb.XGBClassifier()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=1,\n",
       "       gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3,\n",
       "       min_child_weight=1, missing=None, n_estimators=100, nthread=-1,\n",
       "       objective='multi:softprob', reg_alpha=0, reg_lambda=1,\n",
       "       scale_pos_weight=1, seed=0, silent=True, subsample=1)"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "classifier.fit(iris['data'], iris['target'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import matplotlib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "matplotlib.use('Agg')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from matplotlib.axes import Axes\n",
    "from graphviz import Digraph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEWCAYAAACdaNcBAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHUlJREFUeJzt3XuUVPWZ7vHvK4IaTSSiYFwHaWK8ZjJqjs4BSbRhoqOZ\n5T3M0mC8G28YkPESc4w4ExyCF5aaeLxGiDkSoxkRnMSZqHSh4mAyTiDEkKMYMXgMOAgKelBu7/mj\ndpOy6aarqV3sen/1fNbaq2tX7dr1vL3h7d1vXdrcHRERSct2RQcQEZH8qbmLiCRIzV1EJEFq7iIi\nCVJzFxFJkJq7iEiC1NylJmb2npk9Y2bPZl8Pz2m/483slDz2tYXHuMDMvlLPx6gyR91rleazfdEB\nJLw/uPuRRYfYGu5+b9EZROpFzV1qZZ1eaTYU+J/ATpT/nT0F/JO7b8hu/zJwNdAL2AjcAtzq7oPN\n7BrgXOA9M9sP+HfgK+5+WcX+xwMLgBeBB4BVwK/d/TozOxCYAOya7f/XwLfd/f0OGccDC9z9UTNb\nAPwQ+FugL3Aj8H+BbwGfBH7l7qOz+y3IsrfnXw9McvcnstsN+Abwd8AG4P8B17r7f5jZoMq8wJrK\nWt39u2Z2ADAJ6Ef5t+ufA4dk37//zB7/pux+uwJzgUvdfWP2+C3ARGAvYGfgTeBb7v5bM+sLfBcY\nnB2Xt4Fr3P3Vzo6jBObuWrRs9QK8B8wC2rKvhwEHAA8DO1Rs93XKzQlgeLb9btl6b+AOYEXF9tcB\np2SXjwJu7/C444FTgEHAh8Bnsuv3AP4F6Fux7XHAjzrJPr7iMdYCl2SXdwZ+D9wJWHbdD4GjssuL\ngO8DvbP13YCngSOz9W8BtwK9svVBwC+BfTvm7aTWPYD5wF9U3H4O8D7w+Wx9DeUfFu233wmMqsjy\nK+CQitv3pfzDDeDRDvveByhVHistaSw6c5da/cHdR1ReYWY3AJ8BnsjOYgGc8hkuwHnA5e6+AsDd\n15nZGODUyt1087i9Ki7/p7svyi6fBHwaeLTDY+/Wzf5Wuvv/yvK8b2aLgamedUDgBWA/YDawDhjr\n7uuz7VeY2TjgCuAZ4HR3/1z7jt399ex78jXgBx3ydqz1RGCKu/+24v5TzOz0im1WufuEivUZlH9g\nPpjV/yN3n1dx/1eA75jZvsBQ4PaK7w2Uf1M5EJiHJEPNXerBgX9w9xmVV5rZDhW3d/xQoy018w+A\nHTtc17/i8soOt93t7rd1eOyO9+/ovzqsrwOWVaxvoPwbRlcqX5ywoYtt2mvsmLejzj7wqfL70zHr\nGsrjLyj/0NvS/+u57n7yR3ZstpO7r+kmkwSjV8tIrTprytOAq8ysP4CZ9TKzf6I8hwa4B7jZzPpl\nt/cGbgb6dLHvl4EhZvaxbPvDKY9k2lU205nA18zs05t2YnZxtv8t6ayhbuxi297AZDPrk+1/D2Ay\n0P4E7aNmNsHMtstuHwhcQ/nMumPeTTGzr9Oz/J+tyH8mcEQ3WdvNAM4ys0Mr7n+gmV2bncF/3MyO\nq7jtCODfOpzJSwJ05i612qzRuPvvzOxK4H9nDXAn4KfuflN2+7NmdiPw06yxr6P8BOLxFbv5HfBt\nM/uTuz9vZt8F5pjZ+5SfiLyvswzuvixrhrea2ccpz89nAWO7yd6xji010DWUm+jPzWwnyj8EJrr7\nM9nt3yEb0ZjZBsoz9jHu/vvsCdWO++5Y62nATWa2O+Wm/xjws2qyuftbZjYSmJT90OkN/Am4Nttk\nJHCjmV1N+YfpG8DIivGTJMJ0TEV6xswWVM7URRqRxjIiPaczIml4OnMXEUmQztxFRBKk5i4ikiA1\ndxGRBDXMSyHNTMN/EZGt4O6bvU+hoc7ci/4shlqW8ePHF56h2WuInj+FGqLnj1hDVxqquUe2ePHi\noiPULHoN0fND/Bqi54c0agA1dxGRJKm55+Tss88uOkLNotcQPT/EryF6fkijBmigNzGZmTdKFhGR\nKMwMb/QnVCMrlUpFR6hZ9Bqi54f4NUTPD2nUAGruIiJJ0lhGRCQwjWVERJqImntOUpjTRa8hen6I\nX0P0/JBGDaDmLiKSJM3cRUQC08xdRKSJqLnnJIU5XfQaoueH+DVEzw9p1ABq7iIiSdLMXUQkMM3c\nRUSaiJp7TlKY00WvIXp+iF9D9PyQRg2g5i4ikiTN3EVEAtPMXUSkiai55ySFOV30GqLnh/g1RM8P\nadQAau4iIknSzF1EJDDN3EVEmoiae05SmNNFryF6fohfQ/T8kEYNoOYuIpIkzdxFRALTzF1EpImo\nueckhTld9Bqi54f4NUTPD2nUAGruIiJJ0sxdRCQwzdxFRJqImntOUpjTRa8hen6IX0P0/JBGDaDm\nLiKSJM3cRUQC08xdRKSJqLnnJIU5XfQaoueH+DVEzw9p1ABq7iIiSdq+njs3synAYGAmcCLQG7jL\n3R/oYvt6xhERqYsBAwaxdOli1qxZw7nnnsuSJUswMyZNmsSQIUM47bTTWL9+PY8++uhm9124cCFz\n587lnHPOyTVTvc/c+wFjgf3c/Sjgi8BXzWy3zjd3LVq0aAm3LFv2OgBLly5lzJgxPPfcc9xzzz08\n+OCDbLfddjz88MN0ZsWKFYwaNYrVq1d3enst6tbczexSYChwCzABwN03AC9TPoNPTKnoADkoFR2g\nRqWiA+SgVHSAGpWKDpCD0lbfc/DgwQwZMoQrr7ySww8/nHPPPbfLbTdu3Mjll1/OuHHjtvrxtqRu\nzd3d7wCedfe/dvc3AMzsMOB9d19Wr8cVESnaTTfdxPz587nyyiu73Oa6665j7NixDBw4kHq8DLyu\nM/dKZvZZ4OvARV1vdTbQkl3uCxwCtGbrpexro663X9coebZ2nW5ub/R1urld61rvbr11K+5ffpXN\nnnvuyT777MOcOXMA2GeffXj77bdZsGABy5cv37TtjTfeyLRp03j++edZuXIly5cvp1evXowePXrT\nvgBaW1s3Wy+VSkydOhWAlpYWuuTudVuA6dnXg4D7gO23sK2Da9GiRUvABXd3v/vuu/2+++5zd/eV\nK1f6Mccc4+1OOukk78zs2bP9tttu6/S2amSPTcel3k+oupntDzwNHAA8aWazzOx/1PlxC1AqOkAO\nSkUHqFGp6AA5KBUdoEalogPkoLTV9zzvvPOYM2cOI0aMYOTIkUycOHHTbV29GrAeIxlosI8fgMbI\nsnVKfHREE1GJ2DWUiJ0f4tdQInZ+2LoarG5NuttH7uLjB9TcRURq1njNfZs9oVodvYlJROIZMGBQ\n0RE201AfP1CPJ3W31dLW1lZ4hmavIXr+FGqInn9ra1i6dHHR7XMzDdXcRUQkHw01c2+ULCIiUejz\n3EVEmoiae05S+Azo6DVEzw/xa4ieH9KoAdTcRUSSpJm7iEhgmrmLiDQRNfecpDCni15D9PwQv4bo\n+SGNGkDNXUQkSZq5i4gEppm7iEgTUXPPSQpzuug1RM8P8WuInh/SqAHU3EVEkqSZu4hIYJq5i4g0\nETX3nKQwp4teQ/T8EL+G6PkhjRpAzV1EJEmauYuIBKaZu4hIE1Fzz0kKc7roNUTPD/FriJ4f0qgB\n1NxFRJKkmbuISGCauYuINBE195ykMKeLXkP0/BC/huj5IY0aQM1dRCRJmrmLiASmmbuISBNRc89J\nCnO66DVEzw/xa4ieH9KoAdTcRUSSpJm7iEhgmrmLiDQRNfecpDCni15D9PwQv4bo+SGNGkDNXUQk\nSQ01cy86g0jKBgwYxNKli4uOITnrauZe1+ZuZlOAwcBLwKGAAze7+/ROtvXyzSJSH4a7c8UVV/Di\niy+ybt06rr32WgYMGMBVV13Fhg0b2Guvvbj//vvp06fPpnstXLiQuXPncs455xSYXbpS1BOq/YBT\ngRnufgRwAvDVOj9mQUpFB8hBqegANSoVHSAHpbrufcaMGfTr14+2tjaeeuopJkyYwDe/+U0eeeQR\nZs2axdFHH82dd965afsVK1YwatQoVq9eXdX+U5hXp1AD1LG5m9mlwFDgPnf/hZmNAV4HflSvxxSR\nLdtvv/246KKLANhxxx0ZOHAg3/72t+nbty8ABx10EGvXrgVg48aNXH755YwbN66wvLL16j2WedTd\nT6lYHwA8BBzv7u912FZjGZG6Ko9l2j3++OO89tprfOMb3wDgww8/5LzzzuOOO+5g11135dprr+XU\nU09l1apVzJs3jzFjxhQVXLagq7HM9vV/XBsErHD31e6+zMyeAvYHXtx887OBluxyX+AQoDVbL2Vf\nta51rW/9etmtt97KCy+8wI9//GMAnn76aSZPnszkyZPZddddufHGG5k2bRrPP/88K1euZPny5fTq\n1YvRo0eX95aNLlpbW7W+jddLpRJTp04FoKWlhS65e90WYDpwNPCdbH0H4F+AXTrZ1sEDL20NkKHZ\na4iev9414O7upVLJx40b5+3Wrl3rF1xwgS9YsMA7M3v2bL/ttts6va2jtra2qrZrZNFqyI4rHZd6\nn7m7uz9pZl8ys9nARmCidxjJiMi28dxzz3HiiSdy6KGHMnz4cDZs2ED//v2ZN28eixYtAuCoo45i\n/Pjxm+5T7h8STYO9zr0xsoikydSoE1TTzN3MDgfmUx6rjAF+5u6/zjciwGb5RCQnAwYMKjqCbEPV\nvhRykruvBS4F/hW4px5hOpsbRVna2toKz9DsNUTPX+8atsW7U1N4jXgKNUD1zX1nM/t74FXKr3Kp\n7h0NIiJSiKpm7mbWAuzn5TcjfRw41N2fyTWIPs9dRKTHav34gTeB/5ZdHpR3YxcRkXxV29z/ETjf\nzHoDV5jZqDpmCimFOV30GqLnh/g1RM8PadQA1Tf3Q4EP3H0dcB5wSf0iiYhIraqduc+k/K7SEdl6\nyd1bcw2imbuISI/VOnOfX7Gjc4A/5hVMRETyV/Xr3IHfmNkvgCHAZfWLFFMKc7roNUTPD/FriJ4f\n0qgBqv9UyGPdfWxdk4iISG6qnbk/DIxz9zfqFkQzdxGRHqv189w/ATxtZn8APqD8aY+ndHMfEREp\nSFUzd3c/1t33d/fj3P1kNfbNpTCni15D9PwQv4bo+SGNGqD6T4WcTvnzeHcCPg28pAYvItK4evx5\n7mY2HDjS3f8h1yCauYuI9Fitr3PfxN3bgKNySSUiInXR4+ZuZoMp/9EOqZDCnC56DdHzQ/waoueH\nNGqAns/cDVgFfL2eoUREpDbVvs79y+7+84r1Pdz9v3INopm7iEiPbdXr3M3sBMpn698xs8ptLwaO\nyzeiiIjkpbuZ+yeBvkDv7Osns2VinXOFk8KcLnoN0fND/Bqi54c0aoBuztzd/YcAZrbI3edsm0gi\nIlKramfuYyg/obqJu9+eaxDN3EVEeqzW17l/DBgGvAZ8gfKYRkREGlS1zf1I4Ax3fxz4GjC8fpFi\nSmFOF72G6Pkhfg3R80MaNUD1zX2H7O+n4u4fUj6TFxGRBtWTv6F6mbu/bmaDgO+5+wm5BtHMXUSk\nx7qauVfb3D8D3AUsBvYGLnb3V3MOqOYuItJDNT2h6u6L3P1LwFXufkzejT0FKczpotcQPT/EryF6\nfkijBqiyuZtZi5ndDbxnZpfUOZOIiNSo2rHMfcA+7j7czC4A9nP3K3MNorGMiEiP1fo6973J3sTk\n7vcCQ3LMJiIiOau2ub/bfsHMjPKHiUmFFOZ00WuInh/i1xA9P6RRA1Tf3J8CdjKz/YD7gX+rXyQR\nEanVFmfuZjbS3R/JLp8JHAbMdfdpuQfRzF1EpMe26nXuZjbL3Ud0vFyngOrswoABg1i6dHHRMUTC\nyO0PZPfwQaeYWcnMRpvZs2Y228xO7PoeHnhpa4AM8WtYtux1AGbOnMngwYP5zW9+A8Djjz/O0KFD\nGTZsGBdffDEdLVy4kKuvvnqz66OJPu+Nnh/SqAG6/xuq+5rZZMpPoFZedncfV8X++wHnAxPd/Ytm\n1geYaWaz3f2dmpJL0k444QTmzZu3aX3dunXMnj2bPn36cPrpp/PWW2/Rv39/AFasWMGoUaMYNmxY\nUXFFGk53zf0LFZdv7cmOzexSYCgwHxgJ4O5rzexBoBV4rCf7a3ytRQfIQWvRAT6icmR4yimnsHjx\nYoYNG8aQIUM2NfaNGzdy+eWXM27cOFasWFFU1Ny0trYWHaEm0fNDGjVAN2MZd3+9q6W7Hbv7HcBz\nwD8CyypuWgb0rym1NKWWlhaWLFnCAQccwLRp5ef0r7vuOsaOHcvAgQPRE/Iif9bdmXse3uajzXxP\n4K3ONz0baMku9wUO4c9nk6Xsa6Ou3xosb2fr84CxBefJ1kolFi9eDMDatWt56KGH2HvvvWltbeWS\nSy5h9OjRvPHGG8yYMYPnn3+eJUuW8O6773L44YdzxBFHbJqbtp+FRVlvv65R8jRb/srsjZKns3xT\np04Fyic8XXL3ui3AdGAfYFq23gd4AujbybYOHnhpa4AMKdSAt7v++ut9/vz5vnHjRh8+fLivWbPG\n3d2/973v+QMPPOCVZs+e7aNHj/bo2traio5Qk+j53ePVkP2foeNS7zN3d/dXzWyumT1TbuDc4kk+\nmdpadIActBYd4CPKb4Yufx0/fjzHHHMMvXv35uCDD2by5Mkf2dbd2XfffYuImavo897o+SGNGqDK\nDw7bFsqvc2+MLFIko1H+TYpEUMjr3HvOtDT5MmDAILZWCq9Pjl5D9PyQRg2wbZ5QrVrkM7ZSqRT+\n17kUahCRsoYayzRKFhGRKIKMZUREJA9q7jlJYU4XvYbo+SF+DdHzQxo1gJq7iEiSNHMXEQlMM3cR\nkSai5p6TFOZ00WuInh/i1xA9P6RRA6i5i4gkSTN3EZHANHMXEWkiau45SWFOF72G6Pkhfg3R80Ma\nNYCau4hIkjRzFxEJTDN3EZEmouaekxTmdNFriJ4f4tcQPT+kUQOouYuIJEkzdxGRwDRzFxFpImru\nOUlhThe9huj5IX4N0fNDGjWAmruISJI0cxcRCUwzdxGRJqLmnpMU5nTRa4ieH+LXED0/pFEDqLmL\niCRJM3cRkcA0cxcRaSJq7jlJYU4XvYbo+SF+DdHzQxo1gJq7iEiSNHMXEQlMM3cRkSai5p6TFOZ0\n0WuInh/i1xA9P6RRA6i5i4gkSTN3EZHANHMXEWkidW3uZjbFzEpm9ldmttDMTuhmey01Lnvu2QLA\nunXrOP/88xk2bBhHHHEE06ZNA2D48OGMGDGCESNG8MQTT3zk+x991hg9P8SvIXp+SKMGgO3rvP9+\n7t4KYGYTu9888limBLQWnAGWLSv/dvbKK6/Qq1cv5syZw+rVqznzzDMZOXIkn/rUpzY1ehFJV91m\n7mZ2KXA98Jy7n2xmZwEr3X1mF9t77ObeKIz2Y3r22Wcza9YsVq1axWOPPcYuu+zCGWecwV577cWR\nRx7J9ddfX2xUEanZNp+5u/sdwLPufnK9HkO6NnPmTHbffXf++Mc/smjRIiZMmMD+++/PSy+9xKxZ\ns3B3pk+fXnRMEamTeo9leuhsoCW73Bc4hD+POkrZ10Zdv7VB8pbdf//9nHHGGQDsvvvutLS0MG3a\nNC688EIABg8ezPTp0zn55PLP3lKpxLx58xg7duymdYDW1tYw69Hzt2ttbW2YPM2WvzJ7o+TpLN/U\nqVMBaGlpoUvuXrcFmF5x+SzghC1s6+CBl7YGyOBePqTud911l0+ZMsXd3T/44AM/7rjj/Oijj/YP\nP/zQ3d3Hjx/vjz32mFdqa2vzyKLnd49fQ/T87vFqyP7Pb9ZT6/o6dzN71N1PyS5r5r5NlGfu69ev\nZ/To0bz88sts2LCBq666io0bNzJx4kT69OnDsGHDuOGGG4oOKyI16mrm3lBvYlJzz8Ofn1AVkfTp\nTUx1Vyo6QM2iv743en6IX0P0/JBGDdBwT6hu9sNHemjAgEFFRxCRBtBQY5lGySIiEoXGMiIiTUTN\nPScpzOmi1xA9P8SvIXp+SKMGUHMXEUmSZu4iIoFp5i4i0kTU3HOSwpwueg3R80P8GqLnhzRqADV3\nEZEkaeYuIhKYZu4iIk1EzT0nKczpotcQPT/EryF6fkijBlBzFxFJkmbuIiKBaeYuItJE1NxzksKc\nLnoN0fND/Bqi54c0agA1dxGRJGnmLiISmGbuIiJNRM09JynM6aLXED0/xK8hen5IowZQcxcRSZJm\n7iIigWnmLiLSRNTcc5LCnC56DdHzQ/waoueHNGoANXcRkSRp5i4iEphm7iIiTUTNPScpzOmi1xA9\nP8SvIXp+SKMGUHMXEUmSZu4iIoFp5i4i0kTU3HOSwpwueg3R80P8GqLnhzRqADV3EZEkaeYuIhKY\nZu4iIk1EzT0nKczpotcQPT/EryF6fkijBlBzz828efOKjlCz6DVEzw/xa4ieH9KoAdTcc/POO+8U\nHaFm0WuInh/i1xA9P6RRA6i5i4gkSc09J4sXLy46Qs2i1xA9P8SvIXp+SKMGaLCXQhadQUQkos5e\nCtkwzV1ERPKjsYyISILU3EVEElR4czezG8xstpm1mdnQovNUy8y+bGa/NbNZZvYTMzswuzzLzL5v\nZr2KztgZMzvBzF4zs7/M1jvN3ajHpWP+7Lq2ihqOy65ryPwAZnZzlus5Mzs24DGozP832XVhjoGZ\n7WRmP87yP2tmR0Q7BtXYvsgHN7MRwE7ufpSZfQKYbmbHuPuGInNV6XDgNHf/LYCZ/Qw4w93fNLOL\ngfOAe4oM2Bl3n2lmh1RcdTMdcpvZIhr0uHTMb2a9gT+5+1crrmvYf1dmdiLwtrsPN7MdgaeAdwhy\nDDrk3wl40sxmEegYAHsCt7n7XDM7ELgM2Jsgx6BaRZ+5/zVwH4C7rwJKwOeKDNQDhwE3m9nPsrPI\n99z9zey2e4Gji4vWLYNNjbGz3CNo7ONS+cqAg4HPZ2dc47PrGvnf1cvAXQDu/gHwFvB+oGNQmX8N\nsAQ4FPjvUY6Bu7+WNfabgF8BU4h1DKpSdHPvByyrWF8G9C8oS0+d5e7HAhdQPvt9q/0Gd19P8d/b\navRj89y9iHVcfg981t1HANuZ2SnAbjRofndf6O4rAczseOAFAh2DTvL/O/A74KAox6Cdu19J+eRg\nErC84vqGPgbVKroBLQf2qFjfk4p/6I3M3VdkX98EVlPODoCZbQ9E+NXtbSq+/xW5P3I9DXxc3P29\nil+Tf0L5LLLh85tZK/AF4BYCHoP2/O5+e7RjYGYHZL+14u6vAq8B+1XcHuIYdKfo5t4GnANgZrsC\nXwQWFJqoCmZ2sZmNyi7vCewC7JBdhvLZ/JNF5auWu68DPtYh9y+AWQQ4Llb2pJn1ya76O+A/aPD8\nZnYUcLy7X52dJYY6BpX5zWy7gMfgSOBMADPrCwwE1kQ6BtUo9AlVd3/azL5kZiXAgWuCPFkxBfiR\nmZ0PrAfGUs7/oJkZ8FJ2XaOqfOfaNXTI7e4bzOzoBj4uDuDubma3AyUzWwvMcfcZAI2a38y+AMwA\nfm1mbZTzXUaQY9BF/ukEOgbAD4B7sxO0DcDVwAcEOQbV0jtURUQSVPRYRkRE6kDNXUQkQWruIiIJ\nUnMXEUmQmruISILU3EVEEqTmLkkzs0FmtqTiE/9GFp1JZFso9E1MItvII+4+rugQItuSmrs0g83+\nviRA9pndM4CdgWnufm/2MbzfBf4C2BFYRPndxquy6/+K8rsVf0n53b1fBMZQ/giKUyl/RsmFwCDg\nPeAhd3+kbpWJdEHvUJWkmdkg4DngleyqO9ubbfYZ3T9w95FmtpO7rzGzW4An3f1fs20+R/kt6n8L\nrHL3u7PrLwQ+Qdbk3f1YM9sZmE35B0G7fsCR7v5u3YsVqaDmLknLmvuYrsYyZnYY8GXgJ+7+f8zs\nMXc/qZPt/tndT61Y3w54BLgNGOruk8zs88BX3P1bFdv1cfe1OZcl0i09oSrNoKuxzHaUPw/+BmBi\ndvUSMzuhYpuDs7/W80szO6Pi7mdR/ix2A9Zk170MHG9me2X3HQT8NM9CRKqlmbs0g65+Pe1P+S/t\n7Al8P7vuauAWMxtHuXG/DPw95T/IMsnMzs2ufxG4ivLMvfwg7u9l45oHzcyBtcBF+Zcj0j2NZURE\nEqSxjIhIgtTcRUQSpOYuIpIgNXcRkQSpuYuIJEjNXUQkQWruIiIJUnMXEUnQ/wfKySHfSqxXLQAA\nAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1092ede10>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "ax = xgb.plot_importance(classifier)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "g = xgb.to_graphviz(classifier, num_trees=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 交叉验证 StratifiedKFold 和xgb.cv()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.cross_validation import StratifiedKFold"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X = load_digits(2)['data']\n",
    "y = load_digits(2)['target']\n",
    "dm = xgb.DMatrix(X, label=y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'multi:softmax', 'num_class': 3}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "seed = 1987"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "nfolds = 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "skf = StratifiedKFold(y, n_folds=nfolds, shuffle=True, random_state=seed)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "cv1 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, seed=seed)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "cv2 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, seed=seed, stratified=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
